Correlation
Matrices
RAW
par(cex=1.0,cex.main=0.8)
breaks <- c(0:5)/5.0;
cormat <- cor(testDataFrame,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
pander::pander(max(abs(cormat)))
1
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
pander::pander(c(Raw_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
breaks = breaks,
main = "Raw Correlation",
cexRow = 0.25,
cexCol = 0.25,
srtCol=35,
srtRow=75,
key.title=NA,
key.xlab="Spearman Correlation",
xlab="Feature", ylab="Feature"
)

#hist(cormat,freq=FALSE,
# density=NULL,
# xlim=c(-1,1),
# ylim=c(0,4.0),
# main="Raw Correlation",xlab="Spearman Correlation")
rawDen <- density(cormat,from=-1,to=1)
par(op)
UPSTM Blind
par(cex=1.0,cex.main=0.8)
## Train Correlation
cormat <- cor(DEdataframe,method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
#pander::pander(colnames(cormat)[apply(abs(cormat),2,max)>rhoThreshold])
pander::pander(c(Train=max(abs(cormat))))
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
| 0.456 |
0.539 |
0.603 |
0.672 |
0.777 |
pander::pander(c(IDeA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
## Test Correlation
cormat <- cor(predTestDe,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
pander::pander(c(Test=max(abs(cormat))))
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
| 0.455 |
0.539 |
0.613 |
0.704 |
0.862 |
pander::pander(c(IDeA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
breaks = breaks,
main = "Correlation after IDeA",
cexRow = 0.25,
cexCol = 0.25,
srtCol=35,
srtRow=75,
key.title=NA,
key.xlab="Spearman Correlation",
xlab="Feature", ylab="Feature")

#hist(cormat,freq=FALSE,
# density=NULL,
# xlim=c(-1,1),
# ylim=c(0,4.0),
# main="Correlation after UPSTM",xlab="Spearman Correlation")
DeDen <- density(cormat,from=-1,to=1)
par(op)
UPSTM
Blind/Spearman
par(cex=1.0,cex.main=0.8)
## Train Correlation
cormat <- cor(DEdataframeSpear,method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
#pander::pander(colnames(cormat)[apply(abs(cormat),2,max)>rhoThreshold])
pander::pander(c(Train=max(abs(cormat))))
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
| 0.501 |
0.595 |
0.675 |
0.753 |
0.879 |
pander::pander(c(IDeA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
## Test Correlation
cormat <- cor(predTestDeSpear,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
pander::pander(c(Test=max(abs(cormat))))
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
| 0.473 |
0.562 |
0.634 |
0.72 |
0.859 |
pander::pander(c(IDeA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
breaks = breaks,
main = "Correlation after IDeA:Spearman",
cexRow = 0.25,
cexCol = 0.25,
srtCol=35,
srtRow=75,
key.title=NA,
key.xlab="Spearman Correlation",
xlab="Feature", ylab="Feature")

#hist(cormat,freq=FALSE,
# density=NULL,
# xlim=c(-1,1),
# ylim=c(0,4.0),
# main="Correlation after UPSTM",xlab="Spearman Correlation")
DeSpearDen <- density(cormat,from=-1,to=1)
par(op)
UPSTM Driven
par(cex=1.0,cex.main=0.8)
## Train Correlation
cormat <- cor(DriDEdataframe,method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
#pander::pander(colnames(cormat)[apply(abs(cormat),2,max)>rhoThreshold])
pander::pander(c(Train=max(abs(cormat))))
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
| 0.456 |
0.539 |
0.603 |
0.672 |
0.777 |
pander::pander(c(IDeA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
## Test Correlation
cormat <- cor(DriDEdataframe,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
pander::pander(c(Test=max(abs(cormat))))
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
| 0.418 |
0.498 |
0.563 |
0.639 |
0.788 |
pander::pander(c(IDeA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
breaks = breaks,
main = "Correlation after Driven-IDeA",
cexRow = 0.25,
cexCol = 0.25,
srtCol=35,
srtRow=75,
key.title=NA,
key.xlab="Spearman Correlation",
xlab="Feature", ylab="Feature")

#hist(cormat,freq=FALSE,
# density=NULL,
# xlim=c(-1,1),
# ylim=c(0,4.0),
# main="Correlation after Driven-UPSTM",xlab="Spearman Correlation")
DeDrivDen <- density(cormat,from=-1,to=1)
par(op)
UPSTM
Spearman
par(cex=1.0,cex.main=0.8)
## Train Correlation
cormat <- cor(DriDEdataframeSpear,method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
#pander::pander(colnames(cormat)[apply(abs(cormat),2,max)>rhoThreshold])
pander::pander(c(Train=max(abs(cormat))))
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
| 0.501 |
0.595 |
0.675 |
0.753 |
0.879 |
pander::pander(c(IDeAS_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
## Test Correlation
cormat <- cor(predTestDriSpear,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
pander::pander(c(Test=max(abs(cormat))))
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
| 0.473 |
0.562 |
0.634 |
0.72 |
0.859 |
pander::pander(c(IDeAS_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
breaks = breaks,
main = "Correlation: Driven/Spearman",
cexRow = 0.25,
cexCol = 0.25,
srtCol=35,
srtRow=75,
key.title=NA,
key.xlab="Spearman Correlation",
xlab="Feature", ylab="Feature")

#hist(cormat,freq=FALSE,
# density=NULL,
# xlim=c(-1,1),
# ylim=c(0,4.0),
# main="Correlation after UPSTM with Spearman",xlab="Spearman Correlation")
DeDrivSpearDen <- density(cormat,from=-1,to=1)
par(op)
PCA
par(cex=1.0,cex.main=0.8)
## Train Correlation
cormat <- cor(PCA_Train,method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
pander::pander(c(Train=max(abs(cormat))))
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
| 0.995 |
0.997 |
0.998 |
0.999 |
0.999 |
pander::pander(c(PCA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
## Test Correlation
cormat <- cor(PCA_Predicted,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
pander::pander(c(Test=max(abs(cormat))))
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
| 0.992 |
0.995 |
0.996 |
0.998 |
0.999 |
pander::pander(c(PCA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
breaks = breaks,
main = "Correlation after PCA",
cexRow = 0.25,
cexCol = 0.25,
srtCol=35,
srtRow=75,
key.title=NA,
key.xlab="Spearman Correlation",
xlab="Feature", ylab="Feature")

#hist(cormat,freq=FALSE,
# density=NULL,
# xlim=c(-1,1),
# ylim=c(0,4.0),
# main="Correlation after PCA",xlab="Spearman Correlation")
PCADen <- density(cormat,from=-1,to=1)
par(op)
EFA
par(cex=1.0,cex.main=0.8)
## Train Correlation
cormat <- cor(EFA_Train,method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
pander::pander(c(Train=max(abs(cormat))))
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
pander::pander(c(EFA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
## Test Correlation
cormat <- cor(EFA_Predicted,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
pander::pander(c(Test=max(abs(cormat))))
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
pander::pander(c(EFA_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
breaks = breaks,
main = "Correlation after EFA",
cexRow = 0.25,
cexCol = 0.25,
srtCol=35,
srtRow=75,
key.title=NA,
key.xlab="Spearman Correlation",
xlab="Feature", ylab="Feature")

#hist(cormat,freq=FALSE,
# density=NULL,
# xlim=c(-1,1),
# ylim=c(0,4.0),
# main="Correlation after EFA",xlab="Spearman Correlation")
EFADen <- density(cormat,from=-1,to=1)
par(op)
PCA Whitening
## Train Correlation
cormat <- cor(PCAWhite_Train,method="pearson")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
pander::pander(c(Train=max(abs(cormat))))
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
| 9.48e-11 |
0.0465 |
0.108 |
0.301 |
0.46 |
pander::pander(c(PCAWhite_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
## Test Correlation
cormat <- cor(PCAWhitePredicted,method="spearman")
cormat[is.na(cormat)] <- 0
diag(cormat) <- 0;
pander::pander(c(Test=max(abs(cormat))))
pander::pander(quantile(abs(cormat),probs=c(0.9,0.95,0.975,0.99,0.999)))
| 0.462 |
0.538 |
0.57 |
0.653 |
0.716 |
pander::pander(c(PCAWhite_fraction=sum(abs(cormat)>rhoThreshold)/ncol(cormat)/ncol(cormat)))
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
breaks = breaks,
main = "Correlation after PCAWhite",
cexRow = 0.25,
cexCol = 0.25,
srtCol=35,
srtRow=75,
key.title=NA,
key.xlab="Spearman Correlation",
xlab="Feature", ylab="Feature")

#hist(cormat,freq=FALSE,
# density=NULL,
# xlim=c(-1,1),
# ylim=c(0,4.0),
# main="Correlation after PCAWhite",xlab="Spearman Correlation")
WhiteDen <- density(cormat,from=-1,to=1)
par(op)
The Density
Plot
par(cex=0.7)
colors=c("red","blue","green","darkblue","darkgreen","purple","orange","darkred");
plot(rawDen,
xlim=c(-1,1),
ylim=c(0.001,7.0),
col=colors[1],
lty=1,
lwd=4,
log="y",
main="Test: Correlation Distribution",xlab="Spearman Correlation")
lines(DeDen,col=colors[2],lty=2,lwd=4)
lines(DeSpearDen,col=colors[3],lty=3,lwd=4)
lines(DeDrivDen,col=colors[4],lty=4,lwd=2)
lines(DeDrivSpearDen,col=colors[5],lty=5,lwd=2)
lines(PCADen,col=colors[6],lty=6,lwd=1)
lines(EFADen,col=colors[7],lty=7,lwd=1)
lines(WhiteDen,col=colors[8],lty=8,lwd=1)
names=c("Raw","IDeA:P","IDeA:S","DIDeA:P","DIDeA:S","PCA","EFA","White:PCA")
#colors=c("red","blue","green","blue","green","purple","purple","gray");
lines=c(1,2,3,4,5,6,7,8)
lwds=c(4,4,4,2,2,1,1,1)
legend("topleft",names,col=colors,lty=lines,lwd=lwds,cex=0.50)

par(op)
Differences
between train and test ROC AUC
par(op)
par(mfrow=c(1,1),cex=0.7)
AUCResults <- list();
diffAUC <- list();
thenames <- rownames(univar$orderframe)[(rownames(univar$orderframe) %in% colnames(testDataFrame))]
rawAUC <- univar$orderframe[thenames,"ROCAUC"]
thenames <- thenames[rawAUC >= aucTHR]
rawAUC <- univar$orderframe[thenames,"ROCAUC"]
rawAUCTest <- univarTest$orderframe[thenames,"ROCAUC"]
AUCResults$RAW_T <- rawAUCTest
diffAUC$RAW <- rawAUCTest-rawAUC
plot(rawAUC,rawAUCTest-rawAUC,
xlab="TRAIN ROC AUC",
ylab="Test:AUC-Train:AUC",
xlim=c(0.5,1.0),
ylim=c(-0.25,0.25),
pch=1,
col=colors[1],
main="ROC AUC Difference Between Test and Train")
thenames <- rownames(univarDe$orderframe)[!(rownames(univarDe$orderframe) %in% colnames(testDataFrame))]
IDeAP <- univarDe$orderframe[thenames,"ROCAUC"]
thenames <- thenames[IDeAP >= aucTHR]
IDeAP <- univarDe$orderframe[thenames,"ROCAUC"]
IDeAPTest <- univarDeTest$orderframe[thenames,"ROCAUC"]
AUCResults$IDeAP <- IDeAP
AUCResults$IDeAP_T <- IDeAPTest
diffAUC$IDeAP <- IDeAPTest-IDeAP
points(IDeAP,IDeAPTest-IDeAP,pch=2,col=colors[2])
thenames <- rownames(univarDeSpear$orderframe)[!(rownames(univarDeSpear$orderframe) %in% colnames(testDataFrame))]
IDeAS <- univarDeSpear$orderframe[thenames,"ROCAUC"]
thenames <- thenames[IDeAS >= aucTHR]
IDeAS <- univarDeSpear$orderframe[thenames,"ROCAUC"]
IDeASTest <- univarDeSpearTest$orderframe[thenames,"ROCAUC"]
AUCResults$IDeAS <- IDeAS
AUCResults$IDeAS_T <- IDeASTest
diffAUC$IDeAS <- IDeASTest-IDeAS
points(IDeAS,IDeASTest-IDeAS,pch=3,col=colors[3])
thenames <- rownames(univarDeDri$orderframe)[!(rownames(univarDeDri$orderframe) %in% colnames(testDataFrame))]
DIDeAP <- univarDeDri$orderframe[thenames,"ROCAUC"]
thenames <- thenames[DIDeAP >= aucTHR]
DIDeAP <- univarDeDri$orderframe[thenames,"ROCAUC"]
DIDeAPTest <- univarDeDriTest$orderframe[thenames,"ROCAUC"]
AUCResults$DIDeAP <- DIDeAP
AUCResults$DIDeAP_T <- DIDeAPTest
diffAUC$DIDeAP <- DIDeAPTest-DIDeAP
points(DIDeAP,DIDeAPTest-DIDeAP,pch=4,col=colors[4])
thenames <- rownames(univarDeDriSpear$orderframe)[!(rownames(univarDeDriSpear$orderframe) %in% colnames(testDataFrame))]
DIDeAS <- univarDeDriSpear$orderframe[thenames,"ROCAUC"]
thenames <- thenames[DIDeAS >= aucTHR]
DIDeAS <- univarDeDriSpear$orderframe[thenames,"ROCAUC"]
DIDeASTest <- univarDeDriSpearTest$orderframe[thenames,"ROCAUC"]
AUCResults$DIDeAS <- DIDeAS
AUCResults$DIDeAS_T <- DIDeASTest
diffAUC$DIDeAS <- DIDeASTest-DIDeAS
points(DIDeAS,DIDeASTest-DIDeAS,pch=5,col=colors[5])
thenames <- rownames(univarPCA$orderframe)[!(rownames(univarPCA$orderframe) %in% colnames(testDataFrame))]
PCA <- univarPCA$orderframe[thenames,"ROCAUC"]
thenames <- thenames[PCA >= aucTHR]
PCA <- univarPCA$orderframe[thenames,"ROCAUC"]
PCATest <- univarPCATest$orderframe[thenames,"ROCAUC"]
AUCResults$PCA <- PCA
AUCResults$PCA_T <- PCATest
diffAUC$PCA <- PCATest-PCA
points(PCA,PCATest-PCA,pch=6,col=colors[6])
thenames <- rownames(univarEFA$orderframe)[!(rownames(univarEFA$orderframe) %in% colnames(testDataFrame))]
EFA <- univarEFA$orderframe[thenames,"ROCAUC"]
thenames <- thenames[EFA >= aucTHR]
EFA <- univarEFA$orderframe[thenames,"ROCAUC"]
EFATest <- univarEFATest$orderframe[thenames,"ROCAUC"]
AUCResults$EFA <- EFA
AUCResults$EFA_T <- EFATest
diffAUC$EFA <- EFATest-EFA
points(EFA,EFATest-EFA,pch=7,col=colors[7])
thenames <- rownames(univarWhite$orderframe)[!(rownames(univarWhite$orderframe) %in% colnames(testDataFrame))]
WPCA <- univarWhite$orderframe[thenames,"ROCAUC"]
thenames <- thenames[WPCA >= aucTHR]
WPCA <- univarWhite$orderframe[thenames,"ROCAUC"]
WPCATest <- univarWhiteTest$orderframe[thenames,"ROCAUC"]
AUCResults$WPCA <- WPCA
AUCResults$WPCA_T <- WPCATest
diffAUC$WPCA <- WPCATest-WPCA
points(WPCA,WPCATest-WPCA,pch=8,col=colors[8])
names=c("Raw","IDeA:P","IDeA:S","DIDeA:P","DIDeA:S","PCA","EFA","White:PCA")
pchs=c(1,2,3,4,5,6,7,8)
legend("bottomright",names,col=colors,pch=pchs,cex=0.50)

par(op)
Violin of
differences
par(op)
par(mfrow=c(1,1),cex=0.7)
vioplot(diffAUC,
ylim=c(-0.25,0.25),
ylab="Test-Train",
main="Test-Train Paired ROC AUC Difference",
col=colors,
cex.axis=0.6,
las=2
)
stripchart(diffAUC, method = "jitter", col = "gray",
vertical = TRUE, pch = 1, add = TRUE,cex=0.35)
points(1:length(diffAUC),lapply(diffAUC,mean), pch = 18, col = "yellow", cex = 2.25)

par(op)
Distribution of
ROC AUC in latent Variables
par(op)
par(mfrow=c(1,1),cex=0.7)
colors2 <- length(AUCResults)
colors2[1] <- colors[1];
colors2[2*(1:(length(colors)-1))] <- colors[2:length(colors)]
colors2[1+2*(1:(length(colors)-1))] <- colors[2:length(colors)]
vioplot(AUCResults,
ylim=c(0.3,1.0),
ylab="ROC AUC",
main="ROC AUC of Latent Variables",
col=colors2,
cex.axis=0.6,
las=2
)
abline(h=0.5,col="black")
stripchart(AUCResults, method = "jitter", col = "gray",
vertical = TRUE, pch = 1, add = TRUE,cex=0.35)
points(1:length(AUCResults),lapply(AUCResults,mean), pch = 18, col = "yellow", cex = 2.25)

par(op)
Differences
between train and test Balanced Accuracy
par(op)
par(mfrow=c(1,1),cex=0.7)
BACCTHR <- 0.5
BACCResults <- list();
diffBACC <- list();
thenames <- rownames(univar$orderframe)[(rownames(univar$orderframe) %in% colnames(testDataFrame))]
rawBACC <- univar$orderframe[thenames,"BACC"]
thenames <- thenames[rawBACC >= BACCTHR]
rawBACC <- univar$orderframe[thenames,"BACC"]
rawBACCTest <- univarTest$orderframe[thenames,"BACC"]
BACCResults$RAW <- rawBACCTest
diffBACC$RAW <- rawBACCTest-rawBACC
plot(rawBACC,rawBACCTest-rawBACC,
xlab="TRAIN Balanced Acc",
ylab="Test:BACC-Train:BACC",
xlim=c(0.5,1.0),
ylim=c(-0.25,0.25),
pch=1,
col=colors[1],
main="Balanced Acc Difference Between Test and Train")
thenames <- rownames(univarDe$orderframe)[!(rownames(univarDe$orderframe) %in% colnames(testDataFrame))]
IDeAP <- univarDe$orderframe[thenames,"BACC"]
thenames <- thenames[IDeAP >= BACCTHR]
IDeAP <- univarDe$orderframe[thenames,"BACC"]
IDeAPTest <- univarDeTest$orderframe[thenames,"BACC"]
BACCResults$IDeAP <- IDeAP
BACCResults$IDeAP_T <- IDeAPTest
diffBACC$IDeAP <- IDeAPTest-IDeAP
points(IDeAP,IDeAPTest-IDeAP,pch=2,col=colors[2])
thenames <- rownames(univarDeSpear$orderframe)[!(rownames(univarDeSpear$orderframe) %in% colnames(testDataFrame))]
IDeAS <- univarDeSpear$orderframe[thenames,"BACC"]
thenames <- thenames[IDeAS >= BACCTHR]
IDeAS <- univarDeSpear$orderframe[thenames,"BACC"]
IDeASTest <- univarDeSpearTest$orderframe[thenames,"BACC"]
BACCResults$IDeAS <- IDeAS
BACCResults$IDeAS_T <- IDeASTest
diffBACC$IDeAS <- IDeASTest-IDeAS
points(IDeAS,IDeASTest-IDeAS,pch=3,col=colors[3])
thenames <- rownames(univarDeDri$orderframe)[!(rownames(univarDeDri$orderframe) %in% colnames(testDataFrame))]
DIDeAP <- univarDeDri$orderframe[thenames,"BACC"]
thenames <- thenames[DIDeAP >= BACCTHR]
DIDeAP <- univarDeDri$orderframe[thenames,"BACC"]
DIDeAPTest <- univarDeDriTest$orderframe[thenames,"BACC"]
BACCResults$DIDeAP <- DIDeAP
BACCResults$DIDeAP_T <- DIDeAPTest
diffBACC$DIDeAP <- DIDeAPTest-DIDeAP
points(DIDeAP,DIDeAPTest-DIDeAP,pch=4,col=colors[4])
thenames <- rownames(univarDeDriSpear$orderframe)[!(rownames(univarDeDriSpear$orderframe) %in% colnames(testDataFrame))]
DIDeAS <- univarDeDriSpear$orderframe[thenames,"BACC"]
thenames <- thenames[DIDeAS >= BACCTHR]
DIDeAS <- univarDeDriSpear$orderframe[thenames,"BACC"]
DIDeASTest <- univarDeDriSpearTest$orderframe[thenames,"BACC"]
BACCResults$DIDeAS <- DIDeAS
BACCResults$DIDeAS_T <- DIDeASTest
diffBACC$DIDeAS <- DIDeASTest-DIDeAS
points(DIDeAS,DIDeASTest-DIDeAS,pch=5,col=colors[5])
thenames <- rownames(univarPCA$orderframe)[!(rownames(univarPCA$orderframe) %in% colnames(testDataFrame))]
PCA <- univarPCA$orderframe[thenames,"BACC"]
thenames <- thenames[PCA >= BACCTHR]
PCA <- univarPCA$orderframe[thenames,"BACC"]
PCATest <- univarPCATest$orderframe[thenames,"BACC"]
BACCResults$PCA <- PCA
BACCResults$PCA_T <- PCATest
diffBACC$PCA <- PCATest-PCA
points(PCA,PCATest-PCA,pch=6,col=colors[6])
thenames <- rownames(univarEFA$orderframe)[!(rownames(univarEFA$orderframe) %in% colnames(testDataFrame))]
EFA <- univarEFA$orderframe[thenames,"BACC"]
thenames <- thenames[EFA >= BACCTHR]
EFA <- univarEFA$orderframe[thenames,"BACC"]
EFATest <- univarEFATest$orderframe[thenames,"BACC"]
BACCResults$EFA <- EFA
BACCResults$EFA_T <- EFATest
diffBACC$EFA <- EFATest-EFA
points(EFA,EFATest-EFA,pch=7,col=colors[7])
thenames <- rownames(univarWhite$orderframe)[!(rownames(univarWhite$orderframe) %in% colnames(testDataFrame))]
WPCA <- univarWhite$orderframe[thenames,"BACC"]
thenames <- thenames[WPCA >= BACCTHR]
WPCA <- univarWhite$orderframe[thenames,"BACC"]
WPCATest <- univarWhiteTest$orderframe[thenames,"BACC"]
BACCResults$WPCA <- WPCA
BACCResults$WPCA_T <- WPCATest
diffBACC$WPCA <- WPCATest-WPCA
points(WPCA,WPCATest-WPCA,pch=8,col=colors[8])
names=c("Raw","IDeA:P","IDeA:S","DIDeA:P","DIDeA:S","PCA","EFA","White:PCA")
pchs=c(1,2,3,4,5,6,7,8)
legend("bottomright",names,col=colors,pch=pchs,cex=0.50)

par(op)
Violin of
differences
par(op)
par(mfrow=c(1,1),cex=0.7)
vioplot(diffBACC,
ylim=c(-0.25,0.25),
ylab="Test-Train",
main="Test-Train Paired Balanced Acc Difference",
col=colors,
cex.axis=0.6,
las=2
)
stripchart(diffBACC, method = "jitter", col = "gray",
vertical = TRUE, pch = 1, add = TRUE,cex=0.35)
points(1:length(diffBACC),lapply(diffBACC,mean), pch = 18, col = "yellow", cex = 2.25)

par(op)
Distribution of
Balanced Acc in latent Variables
par(op)
par(mfrow=c(1,1),cex=0.7)
colors2 <- length(BACCResults)
colors2[1] <- colors[1];
colors2[2*(1:(length(colors)-1))] <- colors[2:length(colors)]
colors2[1+2*(1:(length(colors)-1))] <- colors[2:length(colors)]
vioplot(BACCResults,
ylim=c(0.3,1.0),
ylab="Balanced Acc",
main="Balanced Acc of Latent Variables",
col=colors2,
cex.axis=0.6,
las=2
)
abline(h=0.5,col="black")
stripchart(BACCResults, method = "jitter", col = "gray",
vertical = TRUE, pch = 1, add = TRUE,cex=0.35)
points(1:length(BACCResults),lapply(BACCResults,mean), pch = 18, col = "yellow", cex = 2.25)

par(op)
Differences
between train and test IDI
par(op)
par(mfrow=c(1,1),cex=0.7)
testIDI <- list();
diffIDI <- list();
rawIDI <- univar$orderframe$IDI
rawIDITest <- univarTest$orderframe$IDI
testIDI$RAW <- rawIDITest
diffIDI$RAW <- rawIDITest-rawIDI
plot(rawIDI,rawIDITest-rawIDI,
xlab="TRAIN Test IDI",
ylab="Test:IDI-Train:IDI",
xlim=c(0,0.5),
ylim=c(-0.2,0.2),
pch=1,
col=colors[1],
main="Predict IDI Difference Between Test and Train")
IDeAP <- univarDe$orderframe$IDI[!(rownames(univarDe$orderframe) %in% colnames(testDataFrame))]
IDeAPTest <-univarDeTest$orderframe$IDI[!(rownames(univarDe$orderframe) %in% colnames(testDataFrame))]
testIDI$IDeAP <- IDeAP
testIDI$IDeAP_T <- IDeAPTest
diffIDI$IDeAP <- IDeAPTest-IDeAP
points(IDeAP,IDeAPTest-IDeAP,pch=2,col=colors[2])
IDeAS <- univarDeSpear$orderframe$IDI[!(rownames(univarDeSpearTest$orderframe) %in% colnames(testDataFrame))]
IDeASTest <- univarDeSpearTest$orderframe$IDI[!(rownames(univarDeSpearTest$orderframe) %in% colnames(testDataFrame))]
testIDI$IDeAS <- IDeAS
testIDI$IDeAS_T <- IDeASTest
diffIDI$IDeAS <- IDeASTest-IDeAS
points(IDeAS,IDeASTest-IDeAS,pch=3,col=colors[3])
DIDeAP <- univarDeDri$orderframe$IDI[!(rownames(univarDeDriTest$orderframe) %in% colnames(testDataFrame))]
DIDeAPTest <- univarDeDriTest$orderframe$IDI[!(rownames(univarDeDriTest$orderframe) %in% colnames(testDataFrame))]
testIDI$DIDeAP <- DIDeAP
testIDI$DIDeAP_T <- DIDeAPTest
diffIDI$DIDeAP <- DIDeAPTest-DIDeAP
points(DIDeAP,DIDeAPTest-DIDeAP,pch=4,col=colors[4])
DIDeAS <- univarDeDriSpear$orderframe$IDI[!(rownames(univarDeDriSpearTest$orderframe) %in% colnames(testDataFrame))]
DIDeASTest <- univarDeDriSpearTest$orderframe$IDI[!(rownames(univarDeDriSpearTest$orderframe) %in% colnames(testDataFrame))]
testIDI$DIDeAS <- DIDeAS
testIDI$DIDeAS_T <- DIDeASTest
diffIDI$DIDeAS <- DIDeASTest-DIDeAS
points(DIDeAS,DIDeASTest-DIDeAS,pch=5,col=colors[5])
PCA <- univarPCA$orderframe$IDI[!(rownames(univarPCA$orderframe) %in% colnames(testDataFrame))]
PCATest <- univarPCATest$orderframe$IDI[!(rownames(univarPCA$orderframe) %in% colnames(testDataFrame))]
testIDI$PCA <- PCA
testIDI$PCA_T <- PCATest
diffIDI$PCA <- PCATest-PCA
points(PCA,PCATest-PCA,pch=6,col=colors[6])
EFA <- univarEFA$orderframe$IDI[!(rownames(univarEFA$orderframe) %in% colnames(testDataFrame))]
EFATest <- univarEFATest$orderframe$IDI[!(rownames(univarEFA$orderframe) %in% colnames(testDataFrame))]
testIDI$EFA <- EFA
testIDI$EFA_T <- EFATest
diffIDI$EFA <- EFATest-EFA
points(EFA,EFATest-EFA,pch=7,col=colors[7])
WPCA <- univarWhite$orderframe$IDI[!(rownames(univarWhite$orderframe) %in% colnames(testDataFrame))]
WPCATest <- univarWhiteTest$orderframe$IDI[!(rownames(univarWhite$orderframe) %in% colnames(testDataFrame))]
testIDI$WPCA <- WPCA
testIDI$WPCA_T <- WPCATest
diffIDI$WPCA <- WPCATest-WPCA
points(WPCA,WPCATest-WPCA,pch=8,col=colors[8])
names=c("Raw","IDeA:P","IDeA:S","DIDeA:P","DIDeA:S","PCA","EFA","White:PCA")
pchs=c(1,2,3,4,5,6,7,8)
legend("bottomright",names,col=colors,pch=pchs,cex=0.50)

par(op)
Violin of
differences
par(op)
par(mfrow=c(1,1),cex=0.7)
vioplot(diffIDI,
ylim=c(-0.2,0.2),
ylab="Test-Train",
main="Test-Train Paired Predict IDI Difference",
col=colors,
cex.axis=0.6,
las=2
)
stripchart(diffIDI, method = "jitter", col = "gray",
vertical = TRUE, pch = 1, add = TRUE,cex=0.35)
points(1:length(diffIDI),lapply(diffIDI,mean), pch = 18, col = "yellow", cex = 2.25)

par(op)
Distribution of
Predict IDI in latent Variables
par(op)
par(mfrow=c(1,1),cex=0.7)
colors2 <- length(testIDI)
colors2[1] <- colors[1];
colors2[2*(1:(length(colors)-1))] <- colors[2:length(colors)]
colors2[1+2*(1:(length(colors)-1))] <- colors[2:length(colors)]
vioplot(testIDI,
ylim=c(0.0,0.5),
ylab="Predict IDI",
main="Predict IDI of Latent Variables",
col=colors2,
cex.axis=0.6,
las=2
)
stripchart(testIDI, method = "jitter", col = "gray",
vertical = TRUE, pch = 1, add = TRUE,cex=0.35)
points(1:length(testIDI),lapply(testIDI,mean), pch = 18, col = "yellow", cex = 2.25)

par(op)
The tables
pander::pander(univarTest$orderframe[1:TopVariables,univariate_columns])
| V_904 |
0.224 |
0.125 |
0.264 |
0.118 |
0.896 |
0.0284 |
0.596 |
0.600 |
| V_918 |
0.231 |
0.130 |
0.271 |
0.120 |
0.874 |
0.0257 |
0.592 |
0.596 |
| V_916 |
0.230 |
0.129 |
0.270 |
0.119 |
0.883 |
0.0264 |
0.596 |
0.596 |
| V_914 |
0.228 |
0.128 |
0.269 |
0.119 |
0.890 |
0.0269 |
0.596 |
0.596 |
| V_912 |
0.227 |
0.127 |
0.267 |
0.118 |
0.893 |
0.0273 |
0.596 |
0.596 |
pander::pander(univarDeTest$orderframe[1:TopVariables,univariate_columns])
| La_V_1008 |
1.77e-04 |
1.94e-03 |
2.77e-03 |
1.24e-03 |
0.602 |
0.29673 |
0.742 |
0.871 |
| La_V_1154 |
-7.96e-06 |
1.72e-04 |
-2.07e-04 |
1.33e-04 |
0.932 |
0.11832 |
0.637 |
0.849 |
| La_V_1048 |
-4.38e-05 |
4.95e-05 |
1.11e-05 |
2.97e-05 |
0.505 |
-0.00487 |
0.533 |
0.849 |
| La_V_1046 |
-9.42e-04 |
3.65e-03 |
-5.11e-03 |
3.44e-03 |
0.721 |
0.28722 |
0.787 |
0.844 |
| La_V_1240 |
-7.30e-05 |
2.85e-04 |
-3.63e-04 |
2.11e-04 |
0.267 |
0.00547 |
0.483 |
0.822 |
pander::pander(univarDeSpearTest$orderframe[1:TopVariables,univariate_columns])
| La_V_1052 |
-1.60e-03 |
1.49e-03 |
8.17e-04 |
1.60e-03 |
0.393 |
0.343 |
0.804 |
0.911 |
| La_V_1100 |
-7.37e-03 |
1.84e-02 |
1.36e-02 |
1.65e-02 |
0.170 |
0.231 |
0.750 |
0.836 |
| La_V_1040 |
-2.55e-05 |
7.84e-05 |
8.28e-05 |
9.01e-05 |
0.564 |
0.224 |
0.692 |
0.831 |
| La_V_930 |
3.22e-05 |
1.12e-04 |
1.75e-04 |
1.38e-04 |
0.187 |
0.170 |
0.650 |
0.827 |
| La_V_1232 |
-2.75e-03 |
2.91e-03 |
-6.00e-03 |
3.24e-03 |
0.853 |
0.162 |
0.713 |
0.822 |
pander::pander(univarDeDriTest$orderframe[1:TopVariables,univariate_columns])
| La_V_1008 |
1.77e-04 |
1.94e-03 |
2.77e-03 |
1.24e-03 |
0.602 |
0.29673 |
0.742 |
0.871 |
| La_V_1154 |
-7.96e-06 |
1.72e-04 |
-2.07e-04 |
1.33e-04 |
0.932 |
0.11832 |
0.637 |
0.849 |
| La_V_1048 |
-4.38e-05 |
4.95e-05 |
1.11e-05 |
2.97e-05 |
0.505 |
-0.00487 |
0.533 |
0.849 |
| La_V_1046 |
-9.42e-04 |
3.65e-03 |
-5.11e-03 |
3.44e-03 |
0.721 |
0.28722 |
0.787 |
0.844 |
| La_V_1240 |
-7.30e-05 |
2.85e-04 |
-3.63e-04 |
2.11e-04 |
0.267 |
0.00547 |
0.483 |
0.822 |
pander::pander(univarDeDriSpearTest$orderframe[1:TopVariables,univariate_columns])
| La_V_1052 |
-1.60e-03 |
1.49e-03 |
8.17e-04 |
1.60e-03 |
0.393 |
0.343 |
0.804 |
0.911 |
| La_V_1100 |
-7.37e-03 |
1.84e-02 |
1.36e-02 |
1.65e-02 |
0.170 |
0.231 |
0.750 |
0.836 |
| La_V_1040 |
-2.55e-05 |
7.84e-05 |
8.28e-05 |
9.01e-05 |
0.564 |
0.224 |
0.692 |
0.831 |
| La_V_930 |
3.22e-05 |
1.12e-04 |
1.75e-04 |
1.38e-04 |
0.187 |
0.170 |
0.650 |
0.827 |
| La_V_1232 |
-2.75e-03 |
2.91e-03 |
-6.00e-03 |
3.24e-03 |
0.853 |
0.162 |
0.713 |
0.822 |
pander::pander(univarPCATest$orderframe[1:TopVariables,univariate_columns])
| RC11 |
-0.00596 |
0.0442 |
0.00596 |
0.0375 |
0.832 |
0.0165 |
0.608 |
0.596 |
| RC7 |
-0.04007 |
0.5290 |
0.04007 |
0.4199 |
0.962 |
0.0092 |
0.567 |
0.582 |
| RC17 |
-0.00138 |
0.0143 |
0.00138 |
0.0129 |
0.901 |
0.0112 |
0.571 |
0.582 |
| RC2 |
-16.85713 |
176.4865 |
16.85713 |
159.9626 |
0.838 |
0.0116 |
0.562 |
0.578 |
| RC5 |
-0.37045 |
2.6075 |
0.37045 |
2.7390 |
0.859 |
0.0197 |
0.575 |
0.578 |
pander::pander(univarEFATest$orderframe[1:TopVariables,univariate_columns])
| MR7 |
-0.00911 |
0.0134 |
0.00911 |
0.0347 |
0.285 |
0.07932 |
0.575 |
0.684 |
| MR2 |
-16.86057 |
176.5327 |
16.86057 |
160.0038 |
0.838 |
0.01157 |
0.562 |
0.578 |
| MR4 |
-0.36366 |
2.5412 |
0.36366 |
2.6783 |
0.860 |
0.01991 |
0.575 |
0.578 |
| MR8 |
-0.03165 |
0.4469 |
0.03165 |
0.3505 |
0.888 |
0.00846 |
0.562 |
0.578 |
| MR10 |
-0.00556 |
0.0453 |
0.00556 |
0.0425 |
0.884 |
0.01779 |
0.596 |
0.578 |
pander::pander(univarWhiteTest$orderframe[1:TopVariables,univariate_columns])
| L17 |
-0.553 |
0.992 |
0.351 |
0.806 |
0.921 |
0.2389 |
0.729 |
0.782 |
| L5 |
0.565 |
0.805 |
1.391 |
1.028 |
0.462 |
-0.0320 |
0.496 |
0.747 |
| L7 |
0.665 |
0.666 |
0.276 |
1.243 |
0.489 |
0.0692 |
0.704 |
0.711 |
| L2 |
-0.259 |
1.108 |
-0.909 |
0.785 |
0.999 |
0.1084 |
0.688 |
0.693 |
| L9 |
1.086 |
1.191 |
1.677 |
0.682 |
0.746 |
0.0320 |
0.608 |
0.680 |
topUni <- univar$orderframe$Name[1:TopVariables]
topDe <- univarDe$orderframe$Name[1:TopVariables]
topDeSpear <- univarDeSpear$orderframe$Name[1:TopVariables]
topDeDri <- univarDeDri$orderframe$Name[1:TopVariables]
topDeDriSpear <- univarDeDriSpear$orderframe$Name[1:TopVariables]
topPCA <- univarPCA$orderframe$Name[1:TopVariables]
topEFA <- univarEFA$orderframe$Name[1:TopVariables]
topPCAWhite <- univarWhite$orderframe$Name[1:TopVariables]
Model of top
variables
par(mfrow=c(1,2),cex=0.6)
lmRAW <- glm(paste(outcome,"~."),
trainDataFrame[,c(outcome,topUni)],
family="binomial")
prRaw <- predictionStats_binary(cbind(testDataFrame[,outcome],predict(lmRAW,testDataFrame)),"Top Raw",cex=0.75)
Top Raw
lmDe <- glm(paste(outcome,"~."),
DEdataframe[,c(outcome,topDe)],
family="binomial")
prDe <- predictionStats_binary(cbind(predTestDe[,outcome],predict(lmDe,predTestDe)),"Top IDeA:P",cex=0.75)
Top IDeA:P

lmDeSpear <- glm(paste(outcome,"~."),
DEdataframeSpear[,c(outcome,topDeSpear)],
family="binomial")
prSpear <- predictionStats_binary(cbind(predTestDeSpear[,outcome],predict(lmDeSpear,predTestDeSpear)),"Top IDeA:S",cex=0.75)
Top IDeA:S
lmDri <- glm(paste(outcome,"~."),
DriDEdataframe[,c(outcome,topDeDri)],
family="binomial")
prDri <- predictionStats_binary(cbind(predTestDe[,outcome],predict(lmDri,predTestDri)),"Top DIDeA:P",cex=0.75)
Top DIDeA:P

lmDriSpear <- glm(paste(outcome,"~."),
DriDEdataframeSpear[,c(outcome,topDeDriSpear)],
family="binomial")
prDriSpear <- predictionStats_binary(cbind(predTestDriSpear[,outcome],predict(lmDriSpear,predTestDriSpear)),"Top DIDeA:S",cex=0.7)
Top DIDeA:S
lmPCA <- glm(paste(outcome,"~."),
PCA_Train[,c(outcome,topPCA)],
family="binomial")
prPCA <- predictionStats_binary(cbind(PCA_Predicted[,outcome],predict(lmPCA,PCA_Predicted)),"Top PCA",cex=0.75)
Top PCA

lmEFA <- glm(paste(outcome,"~."),
EFA_Train[,c(outcome,topEFA)],
family="binomial")
prEFA <- predictionStats_binary(cbind(EFA_Predicted[,outcome],predict(lmEFA,EFA_Predicted)),"Top EFA",cex=0.75)
Top EFA
lmPCAW <- glm(paste(outcome,"~."),
PCAWhite_Train[,c(outcome,topPCAWhite)],
family="binomial")
prWPCA <- predictionStats_binary(cbind(PCAWhitePredicted[,outcome],predict(lmPCAW,PCAWhitePredicted)),"Top White:PCA",cex=0.75)
Top White:PCA

par(op)
The Performance
Tables and Plots
par(cex=0.6)
aucs <- prRaw$aucs
aucs <- rbind(aucs,prDe$aucs)
aucs <- rbind(aucs,prSpear$aucs)
aucs <- rbind(aucs,prDri$aucs)
aucs <- rbind(aucs,prDriSpear$aucs)
aucs <- rbind(aucs,prPCA$aucs)
aucs <- rbind(aucs,prEFA$aucs)
aucs <- rbind(aucs,prWPCA$aucs)
rownames(aucs) <- c("RAW",
"IDeA:P",
"IDeA:S",
"DIDeA:P",
"DIDeA:S",
"PCA",
"EFA",
"WPCA"
)
pander::pander(aucs)
| RAW |
0.771 |
0.597 |
0.945 |
| IDeA:P |
0.713 |
0.524 |
0.901 |
| IDeA:S |
0.808 |
0.644 |
0.972 |
| DIDeA:P |
0.713 |
0.524 |
0.901 |
| DIDeA:S |
0.808 |
0.644 |
0.972 |
| PCA |
0.688 |
0.492 |
0.883 |
| EFA |
0.717 |
0.530 |
0.903 |
| WPCA |
0.800 |
0.644 |
0.956 |
bpAUC <- barPlotCiError(as.matrix(aucs),
metricname = "ROC AUC",
thesets = "Test AUC",
themethod = rownames(aucs),
main = "ROC AUC",
offsets = c(0.5,1),
scoreDirection = ">",
ho=0.5,
args.legend = list(bg = "white",x="bottomleft",inset=c(0.0,0),cex=0.5),
col = terrain.colors(nrow(aucs))
)

berror <- prRaw$berror
berror <- rbind(berror,prDe$berror)
berror <- rbind(berror,prSpear$berror)
berror <- rbind(berror,prDri$berror)
berror <- rbind(berror,prDriSpear$berror)
berror <- rbind(berror,prPCA$berror)
berror <- rbind(berror,prEFA$berror)
berror <- rbind(berror,prWPCA$berror)
rownames(berror) <- rownames(aucs)
pander::pander(berror)
| RAW |
0.290 |
0.132 |
0.452 |
| IDeA:P |
0.355 |
0.183 |
0.517 |
| IDeA:S |
0.288 |
0.145 |
0.450 |
| DIDeA:P |
0.352 |
0.195 |
0.529 |
| DIDeA:S |
0.279 |
0.136 |
0.452 |
| PCA |
0.349 |
0.188 |
0.529 |
| EFA |
0.387 |
0.225 |
0.548 |
| WPCA |
0.325 |
0.176 |
0.487 |
bpBER <- barPlotCiError(as.matrix(berror),
metricname = "Balanced Error Rate",
thesets = "Test BER",
themethod = rownames(aucs),
main = "Balanced Error Rate",
offsets = c(0.5,1),
scoreDirection = "<",
ho=0.5,
args.legend = list(bg = "white",x="topleft",inset=c(0.0,0),cex=0.5),
col = terrain.colors(nrow(aucs))
)

par(op)